load all dependancies
source("analysis.r")
load seurat object from ss2_neur/glia sub-clustering results in previous page
because ss2_glia has only 3 sub-clusters, not as complicated as ss2_neur's, first to do ss2_glia
ss2_glia.seur <- readRDS("ss2_glia.seur.rds")
before differential analysis, ..., in fact, did that directly more than once then found sth. interesting,
ss2_Glia_1, no matter inpaper or inlocal(99% matched anyway), may further contain at least two distinct sub-clusters,
just as that one of the three marker genes shows the difference (though they are not that unique).
multiplot(
DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
FeaturePlot(ss2_glia.seur, c("Slc18a2")) + coord_fixed(0.88), cols = 1
)
could also find a few unique (not 100% clean, just visually obvious) identifiers on the top empty part of Glia_1,
like "Rxrg","Xylt1","Pappa" and so on.
multiplot(
FeaturePlot(ss2_glia.seur, c("Rxrg")) + coord_fixed(0.88),
FeaturePlot(ss2_glia.seur, c("Xylt1")) + coord_fixed(0.88),
FeaturePlot(ss2_glia.seur, c("Pappa")) + coord_fixed(0.88), cols = 1
)
and the nGene distribution also get a difference.
DimPlot(ss2_glia.seur, group.by = 'nGene', cols = material.heat(1909)) + labs(title = "Number of genes") +
guides(colour = guide_coloursteps(show.limits = TRUE,label=FALSE)) + coord_fixed(0.88)
sorry for still unavailable to add scale mark on the colorbar because of the conflicts of data formats inside.
lightblue should be >3500, and orange still <6000
ss2_glia's nGene distribution
cat("nGene quantile statistics of ss2_glia: \n" ); quantile(as.numeric(ss2_glia.seur@meta.data$nGene), probs=seq(0,1,0.05))
## nGene quantile statistics of ss2_glia:
## 0% 5% 10% 15% 20% 25% 30% 35% 40% 45% 50%
## 566.0 3321.1 3663.8 3944.0 4123.6 4294.5 4412.4 4533.6 4643.2 4739.1 4842.0
## 55% 60% 65% 70% 75% 80% 85% 90% 95% 100%
## 4930.0 5021.8 5115.4 5211.6 5315.5 5421.0 5550.0 5718.0 5936.0 8069.0
h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene), breaks = 20, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))
title(paste0("nGene distribtusion for Glia_1(red), Glia_2(green), Glia_3(blue)"))
lines(x = d1$x, y=d1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))* diff(h1$breaks)[1],
col="red1",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))* diff(h2$breaks)[1],
col="green3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))* diff(h3$breaks)[1],
col="blue3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1","Glia_2","Glia_3"),lty = 1,lwd=2,
col=c("red","green","blue"))
is this nGene distribution a cause to drive the clustering or it's just somehow related but not the main reason ?
try to sub-cluster Glia_1.
comparing to complicated sub-clustering results of ss2_neur's:
level 1, into Chat+ and Nos1+,
level 2, into PEMNs/PIMNs/PINs/PSNs/PSVNs,
level 3, each again into 2-7 sub-clusters,
ss2_glia just partition into three parts roughly with big 'k'NN and no further biological things to talk about.
maybe it's just because ENS glia lack of existing knowledge, on the other hand,
indeed it's less complicated for having less varible PCs.
batch info
cat("inlocal ss2_Glia_1 in each Mouse: \n");
## inlocal ss2_Glia_1 in each Mouse:
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)[paste0("M",1:29)]
##
## M1 M2 M3 M4 M5 M6 M7 M8 M9 M10 M11 M12 M13 <NA> M15 M16
## 7 9 4 13 16 13 103 66 67 64 65 74 62 4 92
## M17 M18 M19 M20 M21 M22 M23 M24 M25 M26 M27 M28 M29
## 1 57 1 37 59 4 81 2 1 27 48 40 57
min_cells <- 4
cat("ss2_Glia_1 mice: ",length(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)),
"\nss2_mix count: ",sum(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)),
paste0("\n\nmin_cell < ",min_cells," : ",
"\n mice ",sum(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID) < min_cells),
"\n count ",sum((table((ss2_glia.seur@meta.data %>%
filter(inlocal == "Glia_1"))$Mouse_ID))[table((ss2_glia.seur@meta.data %>%
filter(inlocal == "Glia_1"))$Mouse_ID) < min_cells])))
## ss2_Glia_1 mice: 28
## ss2_mix count: 1074
##
## min_cell < 4 :
## mice 4
## count 5
sub-clustering test
#
batch_use.Glia_1 <- (ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID
names(batch_use.Glia_1) <- rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))
batch_use.Glia_1 <- factor(batch_use.Glia_1)
#
#ss2_Glia_1.seur.test <- run_analysis(name='ss2_Glia_1.test',
# counts=ss2_glia.seur@assays[['RNA']]@counts[,rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))],
# do.batch=TRUE,
# batch.use=batch_use.Glia_1,
# min_cells=4,
# var_regex='^Cd|^Ig|^Rn|^mt|^Rp',
# num_pcs=9,
# clust_pcs=6,
# k=100)
# same issue should concern
#ss2_Glia_1.seur.test$orig.ident <- factor("ss2_Glia_1.test")
#ss2_Glia_1.seur.test@active.ident <- factor("ss2_Glia_1.test")
#saveRDS(ss2_Glia_1.seur.test,"ss2_Glia_1.seur.rds")
ss2_Glia_1.seur.test <- readRDS("ss2_Glia_1.seur.rds")
Check PCA and tsne
PCs 1-6 should be fine
ElbowPlot(ss2_Glia_1.seur.test,ndims = 9)
PCs
DimPlot(ss2_Glia_1.seur.test, reduction = 'pca', dims = c(1,2))
DimPlot(ss2_Glia_1.seur.test, reduction = 'pca', dims = c(1,3))
DimPlot(ss2_Glia_1.seur.test, reduction = 'tsne')
how did k100 do to cluster Glia_1,
might be a little more to mixd one or two but not so sure about that
DimPlot(ss2_Glia_1.seur.test, group.by = 'phenograph.k100')
well then try to range a series of 'k'NNs, k=20 to 300,
for convinient storage of variables, use a 'List' object to store all the results
#
#batch_use.Glia_1 <- (ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID
#names(batch_use.Glia_1) <- rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))
#batch_use.Glia_1 <- factor(batch_use.Glia_1)
#
#ss2_Glia_1.seur.range <- list(NULL)
#for(kkk in 20*(1:15)){
# ss2_Glia_1.seur.test <- run_analysis(name='ss2_Glia_1.test',
# counts=ss2_glia.seur@assays[['RNA']]@counts[,rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))],
# do.batch=TRUE,
# batch.use=batch_use.Glia_1,
# min_cells=4,
# var_regex='^Cd|^Ig|^Rn|^mt|^Rp',
# num_pcs=9,
# clust_pcs=6,
# k=kkk)
# same issue should concern
# ss2_Glia_1.seur.test$orig.ident <- factor("ss2_Glia_1.test")
# ss2_Glia_1.seur.test@active.ident <- factor("ss2_Glia_1.test")
#
# ss2_Glia_1.seur.range <- c(ss2_Glia_1.seur.range, ss2_Glia_1.seur.test)
#}
#rm(ss2_Glia_1.seur.test)
#
#saveRDS(ss2_Glia_1.seur.range, "ss2_Glia_1.seur.range.rds")
ss2_Glia_1.seur.range <- readRDS("ss2_Glia_1.seur.range.rds")
k=20:300 in 'List' '[[2]]' - '[[16]]'
ss2_Glia_1.seur.range
## [[1]]
## NULL
##
## [[2]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[3]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1467 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[4]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[5]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1467 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[6]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[7]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[8]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1466 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[9]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1469 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[10]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[11]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1466 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[12]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1467 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[13]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1466 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[14]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1467 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[15]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1468 variable features)
## 2 dimensional reductions calculated: pca, tsne
##
## [[16]]
## An object of class Seurat
## 18482 features across 1074 samples within 1 assay
## Active assay: RNA (18482 features, 1467 variable features)
## 2 dimensional reductions calculated: pca, tsne
how about 'k'NN 300-20 look like
multiplot(
DimPlot(ss2_Glia_1.seur.range[[16]], group.by = 'phenograph.k300') + labs(title="k300"),
DimPlot(ss2_Glia_1.seur.range[[15]], group.by = 'phenograph.k280') + labs(title="k280"),
DimPlot(ss2_Glia_1.seur.range[[14]], group.by = 'phenograph.k260') + labs(title="k260"),
DimPlot(ss2_Glia_1.seur.range[[13]], group.by = 'phenograph.k240') + labs(title="k240"),
DimPlot(ss2_Glia_1.seur.range[[12]], group.by = 'phenograph.k220') + labs(title="k220"),
DimPlot(ss2_Glia_1.seur.range[[11]], group.by = 'phenograph.k200') + labs(title="k200"),
DimPlot(ss2_Glia_1.seur.range[[10]], group.by = 'phenograph.k180') + labs(title="k180"),
DimPlot(ss2_Glia_1.seur.range[[9]], group.by = 'phenograph.k160') + labs(title="k160"),
DimPlot(ss2_Glia_1.seur.range[[8]], group.by = 'phenograph.k140') + labs(title="k140"),
DimPlot(ss2_Glia_1.seur.range[[7]], group.by = 'phenograph.k120') + labs(title="k120"),
DimPlot(ss2_Glia_1.seur.range[[6]], group.by = 'phenograph.k100') + labs(title="k100"),
DimPlot(ss2_Glia_1.seur.range[[5]], group.by = 'phenograph.k80') + labs(title="k80"),
DimPlot(ss2_Glia_1.seur.range[[4]], group.by = 'phenograph.k60') + labs(title="k60"),
DimPlot(ss2_Glia_1.seur.range[[3]], group.by = 'phenograph.k40') + labs(title="k40"),
DimPlot(ss2_Glia_1.seur.range[[2]], group.by = 'phenograph.k20') + labs(title="k20"),
cols = 5)
plot them back to ss2_glia
add meta.data
ss2_glia.seur@meta.data$K300 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[16]]@meta.data),]$K300 <- ss2_Glia_1.seur.range[[16]]@meta.data$phenograph.k300
ss2_glia.seur@meta.data$K280 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[15]]@meta.data),]$K280 <- ss2_Glia_1.seur.range[[15]]@meta.data$phenograph.k280
ss2_glia.seur@meta.data$K260 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[14]]@meta.data),]$K260 <- ss2_Glia_1.seur.range[[14]]@meta.data$phenograph.k260
ss2_glia.seur@meta.data$K240 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[13]]@meta.data),]$K240 <- ss2_Glia_1.seur.range[[13]]@meta.data$phenograph.k240
ss2_glia.seur@meta.data$K220 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[12]]@meta.data),]$K220 <- ss2_Glia_1.seur.range[[12]]@meta.data$phenograph.k220
ss2_glia.seur@meta.data$K200 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[11]]@meta.data),]$K200 <- ss2_Glia_1.seur.range[[11]]@meta.data$phenograph.k200
ss2_glia.seur@meta.data$K180 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[10]]@meta.data),]$K180 <- ss2_Glia_1.seur.range[[10]]@meta.data$phenograph.k180
ss2_glia.seur@meta.data$K160 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[9]]@meta.data),]$K160 <- ss2_Glia_1.seur.range[[9]]@meta.data$phenograph.k160
ss2_glia.seur@meta.data$K140 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[8]]@meta.data),]$K140 <- ss2_Glia_1.seur.range[[8]]@meta.data$phenograph.k140
ss2_glia.seur@meta.data$K120 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[7]]@meta.data),]$K120 <- ss2_Glia_1.seur.range[[7]]@meta.data$phenograph.k120
ss2_glia.seur@meta.data$K100 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[6]]@meta.data),]$K100 <- ss2_Glia_1.seur.range[[6]]@meta.data$phenograph.k100
ss2_glia.seur@meta.data$K80 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[5]]@meta.data),]$K80 <- ss2_Glia_1.seur.range[[5]]@meta.data$phenograph.k80
ss2_glia.seur@meta.data$K60 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[4]]@meta.data),]$K60 <- ss2_Glia_1.seur.range[[4]]@meta.data$phenograph.k60
ss2_glia.seur@meta.data$K40 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[3]]@meta.data),]$K40 <- ss2_Glia_1.seur.range[[3]]@meta.data$phenograph.k40
ss2_glia.seur@meta.data$K20 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[2]]@meta.data),]$K20 <- ss2_Glia_1.seur.range[[2]]@meta.data$phenograph.k20
plot with old Glia_2/3
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300') + labs(title="K300"),
DimPlot(ss2_glia.seur, group.by = 'K280') + labs(title="K280"),
DimPlot(ss2_glia.seur, group.by = 'K260') + labs(title="K260"),
DimPlot(ss2_glia.seur, group.by = 'K240') + labs(title="K240"),
DimPlot(ss2_glia.seur, group.by = 'K220') + labs(title="K220"),
DimPlot(ss2_glia.seur, group.by = 'K200') + labs(title="K200"),
DimPlot(ss2_glia.seur, group.by = 'K180') + labs(title="K180"),
DimPlot(ss2_glia.seur, group.by = 'K160') + labs(title="K160"),
DimPlot(ss2_glia.seur, group.by = 'K140') + labs(title="K140"),
DimPlot(ss2_glia.seur, group.by = 'K120') + labs(title="K120"),
DimPlot(ss2_glia.seur, group.by = 'K100') + labs(title="K100"),
DimPlot(ss2_glia.seur, group.by = 'K80') + labs(title="K80"),
DimPlot(ss2_glia.seur, group.by = 'K60') + labs(title="K60"),
DimPlot(ss2_glia.seur, group.by = 'K40') + labs(title="K40"),
DimPlot(ss2_glia.seur, group.by = 'K20') + labs(title="K20"),
cols=5
)
visually, clustering into 2-4 sub-clusters could be fine,
but genes found upstairs tell us more possible here to choose K300 or K280 with nearly the same result
(next if find DEGs that would get more clusters, back here to use other 'K's)
let's call "0" Glia_1.0 and "1" Glia_1.1
#ss2_glia.seur <- readRDS("ss2_glia.seur.rds")
#ss2_glia.seur@meta.data$K300 <- ss2_glia.seur@meta.data$inlocal
#ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[16]]@meta.data),]$K300 <- ss2_Glia_1.seur.range[[16]]@meta.data$phenograph.k300
#ss2_glia.seur@meta.data$K300[ss2_glia.seur@meta.data$K300=="0"] <- "Glia_1.0"
#ss2_glia.seur@meta.data$K300[ss2_glia.seur@meta.data$K300=="1"] <- "Glia_1.1"
#saveRDS(ss2_glia.seur,"ss2_glia_new.seur.rds")
ss2_glia.seur <- readRDS("ss2_glia_new.seur.rds")
h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene), breaks = 20, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))
title(paste0("nGene distribtusion for Glia_1(red), Glia_2(green), Glia_3(blue)"))
lines(x = d1$x, y=d1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))* diff(h1$breaks)[1],
col="red1",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))* diff(h2$breaks)[1],
col="green3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))* diff(h3$breaks)[1],
col="blue3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1","Glia_2","Glia_3"),lty = 1,lwd=2,
col=c("red","green","blue"))
h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1.1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene), breaks = 30,
xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h1.0 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene), breaks = 30,
xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene), breaks = 30,
xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene), breaks = 20,
xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1.1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene))
d1.0 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene))
title(paste0("nGene distribtusion for \nGlia_1.1(red), Glia_1.0(green), Glia_2(blue), Glia_3(purple)"))
lines(x = d1.1$x, y=d1.1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene))* diff(h1.1$breaks)[1],
col="#F8766D",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d1.0$x, y=d1.0$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene))* diff(h1.0$breaks)[1],
col="#7CAE00",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene))* diff(h2$breaks)[1],
col="#00BFC4",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene))* diff(h3$breaks)[1],
col="#C77CFF",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1.1","Glia_1.0","Glia_2","Glia_3"),lty = 1,lwd=2,
col=c("#F8766D","#7CAE00","#00BFC4","#C77CFF"))
well not a very clean separation on nGene level, could see green(Glia_1.0) with a peak < 4000
DimPlot(ss2_glia.seur, group.by = 'K300', order = rev(c("Glia_1.1","Glia_1.0","Glia_2","Glia_3"))) +
coord_fixed(0.88) + labs(title="inlocal new")
test with MAST, fit a hurdle model,
for mouse colon atlas, fit Yi with covariate:
Yi : standardzed log2(TP10K+1) expression vector for gene i across cells
X : variable reflecting cell subset membership (e.g. PSNs versus non-PSNs)
A : age associated with each cell (aged versus adult)
C : circadian phase for each cell (evening versus morning)
L : location for each cell (distal versus proximal)
S : sex for each cell (male versus female)
T : mouse model for each cell (Uchl1 versus Sox10)
N: scaled number of genes for each cell (i.e., cell complexity)
in paper, comparisons of different conditions on ss2_glia are not mentioned much,
but they are tested on ss2_neur, so here just do it the same way
to find DEGs between different conditions, used to add covariates, but got quite different results comparing to inpaper's
(ss2_neur, similar - different: Segment > Time > Age)
here just without other covariates except nGene
to find DEGs between sub-clusters, add covariates which would have effect on the comparison to do a regression to decrease the putative effects,
though in the end it turns out that nGene level might match the clusterings anyway,
still doubtful about if nGene distribution is the reason or result or sth. else but somehow related ~
to filter for final unique DEGs between sub-clusters, inpaper calculates the next highest values for each highly-regulated gene
but the results are confused and lack exact cutoffs of parameters to get a very close DEG list which is always a tricky process
maybe it's because DEGs between Glia_2/3 are not that unique, and no previous biological knowledge as standard
so no more discussion, except to set a next highest group to make the result look more confident, maybe
add one more contrast between ss2_Glia_1 and ss2_Glia_2&3 !
add condition contrasts on major group !
(ss2_glia no 'Model' contrast)
# build covariates dataframe
#
ss2_glia.cov <- data.frame(inlocal = factor(as.character(ss2_glia.seur@meta.data$K300)),
inpaper = factor(as.character(ss2_glia.seur@meta.data$Annotation)),
Age = factor(as.character(ss2_glia.seur@meta.data$Age_Processed), levels = c("Old","Young")),
Time = as.character(ss2_glia.seur@meta.data$Time_Processed),
Segment = factor(as.character(ss2_glia.seur@meta.data$Segment_Processed), levels = c("Distal","Proximal")),
Seg_raw = as.character(ss2_glia.seur@meta.data$Segment),
Sex = factor(as.character(ss2_glia.seur@meta.data$Sex_Processed), levels = c("M","F")),
Model = factor(as.character(ss2_glia.seur@meta.data$Model_Processed), levels = c("Uchl1","Sox10")),
nGene = scale(as.numeric(ss2_glia.seur@meta.data$nGene)),
Sample = factor(rownames(ss2_glia.seur@meta.data)))
ss2_glia.cov$Time[ss2_glia.cov$Time=="7AM"] <- "Morning"
ss2_glia.cov$Time[ss2_glia.cov$Time=="7PM"] <- "Evening"
ss2_glia.cov$Time <- factor(ss2_glia.cov$Time, levels = c("Evening","Morning"))
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="1"] <- "Seg1"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="2"] <- "Seg2"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="3"] <- "Seg3"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="4"] <- "Seg4"
ss2_glia.cov$Seg_raw <- factor(ss2_glia.cov$Seg_raw, levels = c("Seg1","Seg2","Seg3","Seg4"))
rownames(ss2_glia.cov) <- as.character(ss2_glia.cov$Sample)
ss2_glia.cov$major <- as.character(ss2_glia.seur@meta.data$inlocal)
ss2_glia.cov$major[grep("Glia_1",ss2_glia.cov$major)] <- "Glia_1"
ss2_glia.cov$major[grep("Glia_2",ss2_glia.cov$major)] <- "Glia_23"
ss2_glia.cov$major[grep("Glia_3",ss2_glia.cov$major)] <- "Glia_23"
ss2_glia.cov$major <- factor(ss2_glia.cov$major)
#ss2_glia.cov
differential analysis with MAST
# inlocal
#ss2_glia.masts_inlocal <- sapply(levels(factor(ss2_glia.cov$inlocal)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$inlocal == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Age","Time","Segment","Sex","Model","Sample")],
# formula='~ nGene + ident + Age + Time + Segment + Sex + Model',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal <- do.call(rbind,ss2_glia.masts_inlocal)
# major
#ss2_glia.masts_inlocal.major <- sapply(levels(factor(ss2_glia.cov$major)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$major == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Age","Time","Segment","Sex","Model","Sample")],
# formula='~ nGene + ident + Age + Time + Segment + Sex + Model',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.major <- do.call(rbind,ss2_glia.masts_inlocal.major)
# conditions
# Age
#ss2_glia.masts_inlocal.Age <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age <- do.call(rbind,ss2_glia.masts_inlocal.Age)
# Age_Glia1
#ss2_glia.masts_inlocal.Age_Glia1 <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age)) & (ss2_glia.cov$major == "Glia_1")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Age_Glia1)
# Age_Glia23
#ss2_glia.masts_inlocal.Age_Glia23 <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age)) & (ss2_glia.cov$major == "Glia_23")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Age_Glia23)
# Time
#ss2_glia.masts_inlocal.Time <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time <- do.call(rbind,ss2_glia.masts_inlocal.Time)
# Time_Glia1
#ss2_glia.masts_inlocal.Time_Glia1 <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time)) & (ss2_glia.cov$major == "Glia_1")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Time_Glia1)
# Time_Glia23
#ss2_glia.masts_inlocal.Time_Glia23 <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time)) & (ss2_glia.cov$major == "Glia_23")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Time_Glia23)
# Segment
#ss2_glia.masts_inlocal.Segment <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment <- do.call(rbind,ss2_glia.masts_inlocal.Segment)
# Segment_Glia1
#ss2_glia.masts_inlocal.Segment_Glia1 <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment)) & (ss2_glia.cov$major == "Glia_1")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Segment_Glia1)
# Segment_Glia23
#ss2_glia.masts_inlocal.Segment_Glia23 <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment)) & (ss2_glia.cov$major == "Glia_23")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Segment_Glia23)
# Seg_raw
#ss2_glia.masts_inlocal.Seg_raw <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw)
# Seg_raw_Glia1
#ss2_glia.masts_inlocal.Seg_raw_Glia1 <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw)) & (ss2_glia.cov$major == "Glia_1")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw_Glia1)
# Seg_raw_Glia23
#ss2_glia.masts_inlocal.Seg_raw_Glia23 <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw)) & (ss2_glia.cov$major == "Glia_23")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw_Glia23)
# Sex
#ss2_glia.masts_inlocal.Sex <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex <- do.call(rbind,ss2_glia.masts_inlocal.Sex)
# Sex_Glia1
#ss2_glia.masts_inlocal.Sex_Glia1 <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex)) & (ss2_glia.cov$major == "Glia_1")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Sex_Glia1)
# Sex_Glia23
#ss2_glia.masts_inlocal.Sex_Glia23 <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex)) & (ss2_glia.cov$major == "Glia_23")]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Sex_Glia23)
# Model
#ss2_glia.masts_inlocal.Model <- sapply(levels(factor(ss2_glia.cov$Model)), function(ident){
# ident.use = factor(ifelse(ss2_glia.cov$Model == ident, ident, 'Other'), levels=c('Other', ident))
# cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Model))]
# p.find_markers(seur=ss2_glia.seur,
# ident.1 = ident,
# ident.use=ident.use,
# cells.use=cells.use,
# min_alph=.01,
# min_fc=1.2,
# max_cells=2500,
# covariates=ss2_glia.cov[,c("nGene","Sample")],
# formula='~ nGene + ident',
# lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Model <- do.call(rbind,ss2_glia.masts_inlocal.Model)
## save raw output
#write.table(protectgene(ss2_glia.masts_inlocal),"MAST_raw/ss2_glia.masts_inlocal.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major),"MAST_raw/ss2_glia.masts_inlocal.major.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age),"MAST_raw/ss2_glia.masts_inlocal.Age.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Age_Glia1.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Age_Glia23.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment),"MAST_raw/ss2_glia.masts_inlocal.Segment.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Segment_Glia1.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Segment_Glia23.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia1.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia23.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex),"MAST_raw/ss2_glia.masts_inlocal.Sex.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Sex_Glia1.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Sex_Glia23.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time),"MAST_raw/ss2_glia.masts_inlocal.Time.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Time_Glia1.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Time_Glia23.raw.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
this part would take much time, annotate the code when it's done.
ss2_glia.masts_inlocal <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.major <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.major.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Age <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Age_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age_Glia1.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Age_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age_Glia23.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Segment <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Segment_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment_Glia1.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Segment_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment_Glia23.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Seg_raw <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Seg_raw_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia1.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Seg_raw_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia23.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Sex <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Sex_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex_Glia1.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Sex_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex_Glia23.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Time <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Time_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time_Glia1.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Time_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time_Glia23.raw.csv",
header = TRUE, stringsAsFactors = FALSE, sep = ","))
there're a few columns to filter for the final unique DEGs,
for ss2 data, here ignores contam.res (contaminated residuals for droplet-based filtering)
and would only filter with p.adjD<0.05 (coefD > 0 and sorted) to save DEGs' table,
(keep duplicated DEGs only in one group with biggest coefD instead of using 'next highest' things)
if filter others like mean expression (mu, mean), expressed cells ratio(alpha) and log2FC et al. just for repeating figures in paper..
filt1_mast <- function(mast_raw){
mast_filt <- mast_raw %>% filter(padjD<0.05) %>% filter(coefD >0) %>% arrange(desc(coefD))
mast_filt <- mast_filt[!duplicated(mast_filt$gene),]
mast_filt <- mast_filt %>% arrange(contrast,desc(coefD))
return(mast_filt)
}
ss2_glia.masts_inlocal.filt1 <- filt1_mast(ss2_glia.masts_inlocal)
ss2_glia.masts_inlocal.major.filt1 <- filt1_mast(ss2_glia.masts_inlocal.major)
ss2_glia.masts_inlocal.Age.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age)
ss2_glia.masts_inlocal.Age_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age_Glia1)
ss2_glia.masts_inlocal.Age_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age_Glia23)
ss2_glia.masts_inlocal.Segment.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment)
ss2_glia.masts_inlocal.Segment_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment_Glia1)
ss2_glia.masts_inlocal.Segment_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment_Glia23)
ss2_glia.masts_inlocal.Seg_raw.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw)
ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw_Glia1)
ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw_Glia23)
ss2_glia.masts_inlocal.Sex.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex)
ss2_glia.masts_inlocal.Sex_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex_Glia1)
ss2_glia.masts_inlocal.Sex_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex_Glia23)
ss2_glia.masts_inlocal.Time.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time)
ss2_glia.masts_inlocal.Time_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time_Glia1)
ss2_glia.masts_inlocal.Time_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time_Glia23)
#write.table(protectgene(ss2_glia.masts_inlocal.filt1),"MAST_filt1/ss2_glia.masts_inlocal.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major.filt1),"MAST_filt1/ss2_glia.masts_inlocal.major.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age_Glia1.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age_Glia23.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment_Glia1.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment_Glia23.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex_Glia1.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex_Glia23.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time_Glia1.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time_Glia23.filt1.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
several columns in MAST results could be candidate cutoffs to filter for 'significant' DEGs
| Column | Description |
|---|---|
| ident | Cell subset or lineage |
| gene | Gene |
| coefD | Coefficient (discrete term) |
| pvalD | P-value (discrete term) |
| padjD | Adjusted p-value (discrete term) |
| coefC | Coefficient (continuous term) |
| pvalC | P-value (continuous term) |
| padjC | Adjusted p-value (continuous term) |
| mastfc | Fold change estimated by MAST |
| alpha | Fraction of expressing cells |
| ref_alpha | Fraction of expressing cells (reference group) |
| mu | Mean log2(TP10K) in expressing cells |
| ref_mu | Mean log2(TP10K) in expressing cells (reference group) |
| mean | Mean log2(TP10K) in all cells |
| ref_mean | Mean log2(TP10K) in all cells (reference group) |
| log2fc | log2 fold change (relative to reference group) |
| contam.res | Residual (contamination model) |
| Note: | reference group = all other cells (for example, reference group for Tregs = all non-Tregs) |
in paper, comparing to ss2_neur, analysis of ss2_glia is very absent.
maybe it is because glia can not be the key point considering the poor number of subclusters.
(is it born this way ? Or, showed like this for some quality/technique limitations ?)
besides, the highlighted markers: Gfra2, Slc18a2, Ntsr1, don't look good from the start.
FeaturePlot(ss2_glia.seur,c("Gfra2","Slc18a2","Ntsr1"))
multiplot(
easy_violin(seur=ss2_glia.seur,
genes="Gfra2",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,6),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Slc18a2",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,5),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Ntsr1",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,3),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
cols = 1)
tested cutoffs using coefD + padjD + additional "next highest" fold change, can't directly get these genes.
for Glia 1, some labelings might could be fine.(like Col5a3, Kcna2, Cadm2, but where's Gfra2 ?)
fig.S2E, Glia 1 labeled genes.
FeaturePlot(ss2_glia.seur,c("Col5a3","Kcna2","Frmd4a","Abca8b","Clvs1","Prnp","Cadm2","Kcnn2",
"Aatk","Olfm2","Fyn","Ntng1","Apba2","Nav1","Kcnk13","Zfyve28"))
for Glia 2/3, hard to say...
ss2_glia clusters.
multiplot(
DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
DimPlot(ss2_glia.seur, group.by = 'K300', label = T) + labs(title = "ss2_glia inlocal new"),
cols=3
)
fig.S2E, Glia 2 labeled genes.
FeaturePlot(ss2_glia.seur,c("Slc18a2","Mapk10","Fam184b","Trim9","Lsamp","Erc2","Auts2","Cpe",
"Sobp","Dio2","Ank2","Ppp2r2c","Kcnc4","Kcnab2","Ctnnd2"))
fig.S2E, Glia 3 labeled genes.
(Dkk3/Olfml3/Clu/Ncan/Omg/Ntsr1 look like that they have some pattern)
FeaturePlot(ss2_glia.seur,c("Bcan","Dkk3","Tspan18","Olfml3","Fam184b","Lsamp","Clu","Trim9",
"Mest","Ncan","Omg","Ntsr1","Auts2","Cdh2"))
multiplot(
easy_violin(seur=ss2_glia.seur,
genes="Dkk3",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,5),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Olfml3",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,5),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Clu",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Ncan",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Omg",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Ntsr1",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
cols = 1)
tested "mu","mean","foldchange","n","alpha", still confused about how to exactly do it.
according to the nGene distribution upstairs,
may think "n" or "alpha" which represents "expressed cells (ratio)" could have more weights than only using expressing levels.
finally just use quantile 95% of "diff_alpha = alpha - ref_alpha" as cutoff.
# min - max of diff_alpha
range(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha)
## [1] -0.1274755 0.4270345
# quantile with step 0.05
test_quant <- data.frame(quantile(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant) <- "alpha-ref_alpha"
test_quant
## alpha-ref_alpha
## 0% -0.127475491
## 5% -0.052852965
## 10% -0.032787456
## 15% -0.015386494
## 20% 0.001583513
## 25% 0.012554546
## 30% 0.021046425
## 35% 0.031396877
## 40% 0.043049713
## 45% 0.053028822
## 50% 0.062352190
## 55% 0.073282255
## 60% 0.083200957
## 65% 0.092046598
## 70% 0.101332530
## 75% 0.113223128
## 80% 0.123605688
## 85% 0.135671889
## 90% 0.152707289
## 95% 0.182411398
## 100% 0.427034513
choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant[,1]+0.01*test_quant[,1]/abs(test_quant[,1]),round(test_quant[,1],3),cex=0.8)
filt2
modification from 'future check of ss2_neur':
if one gene expressed in most cells, or with similar ratio in all groups,
but uniquely high in one(or two), this 'alpha - ref_alpha' filtering couldn't find them.
so filt2 add condition:
or "genes with alpha > 'quantile 20%' and 'mu - ref_mu' > 'quantile 95%'"
filt2_mast <- function(mast_raw){
mast_filt <- mast_raw %>% filter(padjD<0.05) %>% filter(coefD >0) %>% arrange(desc(coefD))
mast_filt <- mast_filt[!duplicated(mast_filt$gene),]
mast_filt <- mast_filt %>% arrange(contrast,desc(coefD))
mast_filt$diff_alpha <- mast_filt$alpha - mast_filt$ref_alpha
mast_filt$diff_mu <- mast_filt$mu - mast_filt$ref_mu
mast_filt <- mast_filt %>% filter(diff_alpha> as.numeric((quantile(mast_filt$diff_alpha,
probs = seq(0,1,0.05)))["95%"]) |
(diff_mu > as.numeric((quantile(mast_filt$diff_mu,
probs = seq(0,1,0.05)))["95%"]) &
alpha > as.numeric((quantile(mast_filt$alpha,
probs = seq(0,1,0.05)))["20%"])))
return(mast_filt)
}
ss2_glia.masts_inlocal.filt2 <- filt2_mast(ss2_glia.masts_inlocal)
ss2_glia.masts_inlocal.major.filt2 <- filt2_mast(ss2_glia.masts_inlocal.major)
ss2_glia.masts_inlocal.Age.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age)
ss2_glia.masts_inlocal.Age_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age_Glia1)
ss2_glia.masts_inlocal.Age_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age_Glia23)
ss2_glia.masts_inlocal.Segment.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment)
ss2_glia.masts_inlocal.Segment_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment_Glia1)
ss2_glia.masts_inlocal.Segment_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment_Glia23)
ss2_glia.masts_inlocal.Seg_raw.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw)
ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw_Glia1)
ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw_Glia23)
ss2_glia.masts_inlocal.Sex.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex)
ss2_glia.masts_inlocal.Sex_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex_Glia1)
ss2_glia.masts_inlocal.Sex_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex_Glia23)
ss2_glia.masts_inlocal.Time.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time)
ss2_glia.masts_inlocal.Time_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time_Glia1)
ss2_glia.masts_inlocal.Time_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time_Glia23)
#write.table(protectgene(ss2_glia.masts_inlocal.filt2),"MAST_filt2/ss2_glia.masts_inlocal.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major.filt2),"MAST_filt2/ss2_glia.masts_inlocal.major.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age_Glia1.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age_Glia23.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment_Glia1.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment_Glia23.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex_Glia1.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex_Glia23.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time_Glia1.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time_Glia23.filt2.csv",
# col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
how many DEGs in .filt2
dim(ss2_glia.masts_inlocal.filt2)[1]
## [1] 285
ss2_glia.masts_inlocal.filt2[1:5,]
## gene contrast coefD pvalD coefC pvalC mastfc
## 1 Insc identGlia_1.0 1.476740 4.673997e-49 0.5382472 1.112119e-10 0.6157598
## 2 Col5a3 identGlia_1.0 1.314662 2.607490e-36 0.4858169 5.523741e-12 0.7822629
## 3 Lcp2 identGlia_1.0 1.177220 1.264616e-26 0.2781701 6.778089e-03 0.1222934
## 4 Cspg4 identGlia_1.0 1.163933 1.941090e-28 0.1332955 1.711788e-01 0.4288304
## 5 Igsf21 identGlia_1.0 1.121864 4.956467e-25 0.5641030 1.640965e-14 0.4847441
## pvalH padjD padjC padjH ref n ref_n
## 1 7.968775e-57 2.101429e-45 1.063848e-08 8.956903e-54 Glia_1.0- 392 812
## 2 2.014134e-45 5.861637e-33 6.898538e-10 8.232316e-43 Glia_1.0- 424 1111
## 3 4.369558e-27 1.137143e-23 8.764858e-02 8.541536e-25 Glia_1.0- 208 353
## 4 1.063528e-27 2.181786e-25 4.535179e-01 2.173465e-25 Glia_1.0- 229 416
## 5 1.039548e-36 3.714046e-22 2.544062e-12 3.338436e-34 Glia_1.0- 439 1339
## alpha ref_alpha mu ref_mu mean ref_mean total
## 1 0.7153285 0.3533507 2.404424 1.945548 1.9211019 0.4447211 0.3988714
## 2 0.7737226 0.4834639 2.486427 2.094925 2.1163156 1.0464054 0.3336074
## 3 0.3795620 0.1536118 1.566902 1.247483 0.1693096 -1.4551561 0.4237189
## 4 0.4178832 0.1810270 1.704606 1.704008 0.4457773 -0.7617153 0.3551336
## 5 0.8010949 0.5826806 2.311550 1.554877 1.9915947 0.7756543 0.3564757
## ref_total log2fc ident diff_alpha diff_mu
## 1 0.6011286 1.476381 Glia_1.0 0.3619777 0.4588758345
## 2 0.6663926 1.069910 Glia_1.0 0.2902587 0.3915018136
## 3 0.5762811 1.624466 Glia_1.0 0.2259502 0.3194193922
## 4 0.6448664 1.207493 Glia_1.0 0.2368562 0.0005976068
## 5 0.6435243 1.215940 Glia_1.0 0.2184143 0.7566724910
plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.0"))$coefD,
logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.0"))$padjD)),
pch=16,col="grey",
main="Glia_1.0", xlab="DE coefficient (Glia_1.0 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.0"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.0"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$coefD+0.05,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$padjD)+1),
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$gene,cex=0.6)
legend(x=0,y=45,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.1"))$padjD)),
pch=16,col="grey",
main="Glia_1.1", xlab="DE coefficient (Glia_1.1 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.1"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$coefD+0.05,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$padjD)+1),
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$gene,cex=0.6)
legend(x=0,y=56,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_2"))$coefD,
logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_2"))$padjD)),
pch=16,col="grey",
main="Glia_2", xlab="DE coefficient (Glia_2 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.03,y=1.8,"padjD < 0.05",cex=0.75)
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_2"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_2"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$coefD+0.05,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$padjD)+0.5),
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$gene,cex=0.6)
legend(x=0,y=25,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_3"))$coefD,
logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_3"))$padjD)),
pch=16,col="grey",
main="Glia_3", xlab="DE coefficient (Glia_3 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_3"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_3"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$coefD+0.05,
logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$padjD)+1),
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$gene,cex=0.6)
legend(x=0,y=52,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
as displayed upstairs, filt2 filtering for 'diff_alpha' basically get genes with top '-log10(padjD)' which would make sense logically.
however, if it's 'really significant' ? I have no idea. could only say 'putative' or 'candidate'. next check these DEGs.
clusters
multiplot(
DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
DimPlot(ss2_glia.seur, group.by = 'K300', label = T) + labs(title = "ss2_glia inlocal new"),
cols=3
)
left top group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_1.1", cols = "#7CAE00",
pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1") %>% arrange(padjD))$gene[1:16])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1") %>% arrange(padjD))$gene[17:32])
left down group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_1.0", cols = "#F8766D",
pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0") %>% arrange(padjD))$gene[1:16])
right outside group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_2", cols = "#00BFC4", pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[1:16])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[17:32])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[33:48])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[49:64],ncol = 4)
right inside group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_3", cols = "#C77CFF",
pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[1:16])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[17:32])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[33:48])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[49:64])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[65:80])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[81:96])
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[97:112])
comparing to ss2_neur, in spite of similar cell number, not many DEGs could be highly unique.
it could be possible here glia in ENS have dynamic stages rather than distant subgroups.
these filt2 DEGs just show genes that might be more significant than other unlabeled,
but if there's one gene with alpha=15% and ref_alpha=0 which is more like a unique identifier, it wouldn't be here,
test that situation:
quantile(ss2_glia.masts_inlocal.filt1$ref_alpha, probs = seq(0,1,0.05))
## 0% 5% 10% 15% 20% 25%
## 0.0004191115 0.0291829760 0.0591208726 0.0854779492 0.1131015404 0.1369938686
## 30% 35% 40% 45% 50% 55%
## 0.1594912000 0.1917808000 0.2251219929 0.2592955000 0.2931756602 0.3359116000
## 60% 65% 70% 75% 80% 85%
## 0.3746736292 0.4182505673 0.4702905994 0.5263103280 0.5891329584 0.6595216693
## 90% 95% 100%
## 0.7386141535 0.8518021794 0.9974853311
choose 'ref_alpha=0.06' < 'quantile 10%' as cutoff, all genes with '0.06 < alpha - ref_alpha < 0.182':
('>0.182' already found)
(ss2_glia.masts_inlocal.filt1 %>% filter(ref_alpha<0.06 & ((alpha - 0.06) > ref_alpha) & (alpha - ref_alpha)<0.182)
)[,c("gene","contrast","coefD","n","alpha","ref_alpha","mu","ref_mu")]
## gene contrast coefD n alpha ref_alpha mu
## 1 Artn identGlia_1.0 1.7443915 57 0.10401460 0.02132289 1.5778663
## 2 Emx2os identGlia_1.0 0.9296810 72 0.13138686 0.05961706 -3.8570291
## 3 Cdc42ep3 identGlia_1.0 0.8560334 71 0.12956204 0.05918190 1.4756956
## 4 Cxcl12 identGlia_1.1 1.8005549 52 0.11304348 0.02095557 2.9995314
## 5 Marveld1 identGlia_1.1 1.7675631 51 0.11086957 0.02430847 1.9330923
## 6 Mal identGlia_1.1 1.7492440 83 0.18043478 0.03939648 1.9919836
## 7 Prss23 identGlia_1.1 1.6120380 64 0.13913043 0.03310981 2.8597595
## 8 Flrt3 identGlia_1.1 1.5226250 42 0.09130435 0.02849958 1.5014182
## 9 Apcdd1 identGlia_1.1 1.4689436 52 0.11304348 0.03227158 1.9511701
## 10 Aldh1a1 identGlia_1.1 1.4623211 64 0.13913043 0.05196982 1.7287784
## 11 Bhlhe40 identGlia_1.1 1.4100360 89 0.19347826 0.05867561 2.3615038
## 12 Kcnc4 identGlia_2 0.8833450 134 0.12934363 0.04143646 0.6335837
## 13 Scube3 identGlia_2 0.6773048 129 0.12451737 0.05303867 -0.1776031
## 14 P2rx6 identGlia_2 0.5770731 126 0.12162162 0.05801105 1.0917545
## 15 Dio2 identGlia_2 0.4275244 159 0.15347490 0.05966851 0.9981773
## 16 Gm6568 identGlia_3 1.0738720 108 0.13466330 0.04207436 -1.8670250
## 17 Pcdhb9 identGlia_3 1.0718440 122 0.15211970 0.05724070 2.4847540
## 18 Otor identGlia_3 1.0037820 140 0.17456360 0.05283757 2.4753690
## 19 Omg identGlia_3 0.9251344 122 0.15211970 0.05234834 1.4002720
## 20 Smpd1 identGlia_3 0.9187469 100 0.12468830 0.04990215 1.5836070
## 21 6330403K07Rik identGlia_3 0.6456049 89 0.11097260 0.04647750 1.4924780
## 22 Wnt11 identGlia_3 0.6386678 97 0.12094760 0.05675147 0.8139619
## ref_mu
## 1 1.4372609
## 2 -3.5065562
## 3 1.6565969
## 4 1.4024947
## 5 1.3296913
## 6 1.1450324
## 7 1.7718143
## 8 0.8600446
## 9 0.1580686
## 10 -0.4238265
## 11 1.3402953
## 12 -0.3412070
## 13 0.2276300
## 14 0.6835485
## 15 0.9536753
## 16 -1.3687070
## 17 2.2399820
## 18 1.4570760
## 19 0.8250343
## 20 1.7753680
## 21 1.2886020
## 22 0.7761727
could see Glia_1.0 finally gets a relatively better one, 'Artn',
and Glia_1.1 gets a familiar gene, 'Aldh1a1', which is investigated by Christian Wolfrum Lab in
"snRNA-seq reveals a subpopulation of adipocytes that regulates thermogenesis",
this paper was published less than two months later
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.filt1 %>% filter(ref_alpha<0.06 & ((alpha - 0.06) > ref_alpha) & (alpha - ref_alpha)<0.182))[,c("gene","contrast","coefD","n","alpha","ref_alpha")]$gene)
multiplot(
easy_violin(seur=ss2_glia.seur,
genes="Artn",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Aldh1a1",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
genes="Omg",
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.seur@meta.data[,"K300"],
cells=rownames(ss2_glia.seur@meta.data),
do.facet=FALSE,
facet_by = NULL,
ylim=c(0,4),
color.fill = glia.colors.inlocal,
color.point = glia.colorpoint.inlocal),
cols = 1)
how about UMAP result comparing to tSNE ?
and what does the putative cell trajectory look like ?
#ss2_glia.seur_traj <- ss2_glia.seur
#ss2_glia.seur_traj <- RunUMAP(ss2_glia.seur_traj, dims=1:9)
#saveRDS(ss2_glia.seur_traj,"ss2_glia.seur_traj.rds")
ss2_glia.seur_traj <- readRDS("ss2_glia.seur_traj.rds")
multiplot(
DimPlot(ss2_glia.seur_traj,reduction = 'tsne', group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur_traj,reduction = 'umap', group.by = "K300",label = T) + labs(title = "ss2_glia inlocal new"),
cols = 1)
visually, UMAP result gives a clearer understanding of the difference between clusters.
ref. https://broadinstitute.github.io/2020_scWorkshop/trajectory-inference.html
diffusion_test <- DiffusionMap(t(data.frame(ss2_glia.seur_traj@assays[['RNA']]@data))[,ss2_glia.masts_inlocal.filt2$gene])
diffusion_testpca <- DiffusionMap(data.frame(ss2_glia.seur_traj@reductions[['pca']]@cell.embeddings)[,1:9])
diffusion_tmp <- data.frame(DC1 = eigenvectors(diffusion_test)[,1],
DC2 = eigenvectors(diffusion_test)[,2],
label = ss2_glia.seur_traj$K300)
head(diffusion_tmp)
## DC1 DC2 label
## M1_S1.S364 -0.0170155406 -0.0040727896 Glia_3
## M1_S1.S291 0.0268621785 -0.0137390705 Glia_1.1
## M1_S1.S350 0.0395379687 0.0004830339 Glia_1.1
## M1_S1.S352 -0.0092287037 0.0228178496 Glia_2
## M1_S1.S328 -0.0134275963 0.0133168603 Glia_3
## M1_S1.S327 0.0001965577 0.0156179767 Glia_2
diffusion_pca <- data.frame(DC1 = eigenvectors(diffusion_testpca)[,1],
DC2 = eigenvectors(diffusion_testpca)[,2],
label = ss2_glia.seur_traj$K300)
head(diffusion_pca)
## DC1 DC2 label
## M1_S1.S364 0.015105606 -0.01506936 Glia_3
## M1_S1.S291 -0.025148810 0.01540450 Glia_1.1
## M1_S1.S350 -0.032110815 -0.01779140 Glia_1.1
## M1_S1.S352 0.015109798 -0.01287728 Glia_2
## M1_S1.S328 0.014539857 -0.01412120 Glia_3
## M1_S1.S327 0.008757453 0.01557597 Glia_2
multiplot(
ggplot(diffusion_tmp, aes(x = DC1, y = DC2, colour = label)) +
geom_point(cex=1.0) +
scale_color_manual(values=c("#F8766D","#7CAE00","#00BFC4","#C77CFF")) +
#ggthemes::scale_color_tableau() +
xlab("Diffusion component 1") +
ylab("Diffusion component 2") +
theme_classic() + labs(title=("DM with DEGs")),
ggplot(diffusion_pca, aes(x = DC1, y = DC2, colour = label)) +
geom_point(cex=1.0) +
scale_color_manual(values=c("#F8766D","#7CAE00","#00BFC4","#C77CFF")) +
#ggthemes::scale_color_tableau() +
xlab("Diffusion component 1") +
ylab("Diffusion component 2") +
theme_classic() + labs(title=("DM with PCs")),
cols = 1)
ref. https://bustools.github.io/BUS_notebooks_R/slingshot.html
sds_umap <- slingshot(Embeddings(ss2_glia.seur_traj, "umap"), clusterLabels = ss2_glia.seur_traj$K300)
## Using full covariance matrix
sds_umap
## class: SlingshotDataSet
##
## Samples Dimensions
## 3039 2
##
## lineages: 1
## Lineage1: Glia_3 Glia_2 Glia_1.0 Glia_1.1
##
## curves: 1
## Curve1: Length: 23.846 Samples: 3039
# as people described, to plot Slingshot result, may need to give each cell a color
sds_colors <- ss2_glia.seur_traj$K300
clust_colors <- c("#7CAE00","#F8766D","#00BFC4","#C77CFF")
clust_names <- paste0("Glia_",c("1.1","1.0","2","3"))
for(i in 1:length(clust_colors)){
sds_colors[sds_colors==clust_names[i]] <- clust_colors[i]
}
plot(reducedDims(sds_umap), col = sds_colors, pch=16, cex = 0.35, asp = 1.25,
ylim=c(-5,6),main="Slingshot on UMAP")
lines(sds_umap, lwd=2, type = 'lineages', col = 'black')
legend(x=-12,y=6,clust_names,cex=0.65,
col=clust_colors,pch=c(16,16,16))
seems the trajectory is like Glia_1.1 <--> Glia_1.0 <--> Glia_2 <--> Glia_3.
well, no direction yet,
here just take a look, all those methods are doing the same thing: Dimension Reduction,
which one is better has been discussed a lot,
just as https://broadinstitute.github.io/2020_scWorkshop/trajectory-inference.html,
those google slides make a good introduction.
additionally check pseudotime in monocle3.
ref. https://cole-trapnell-lab.github.io/monocle3/docs/trajectories/#learn-graph
# perform seruat v3 object to monocle v3 trajectory analysis
# ref. https://github.com/satijalab/seurat/issues/1658
gene_annt <- as.data.frame(rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings),
row.names = rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings))
colnames(gene_annt) <- "gene_short_name"
# genes in @feature.loadings is the seurat used variable genes !
# so here just use those about 1500 features instead of whole >10k genes.
cell_meta <- as.data.frame(ss2_glia.seur_traj@meta.data)
new_mtx <- ss2_glia.seur_traj@assays[['RNA']]@counts
new_mtx <- new_mtx[rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings),]
cds <- new_cell_data_set(new_mtx,
cell_metadata = cell_meta,
gene_metadata = gene_annt)
recreate.partition <- c(rep(1,length(cds@colData@rownames)))
#recreate.partition <- ss2_glia.seur_traj@meta.data$K300
names(recreate.partition) <- cds@colData@rownames
recreate.partition <- as.factor(recreate.partition)
cds@clusters@listData[['UMAP']][['partitions']] <- recreate.partition
#cds@clusters@listData[['tSNE']][['partitions']] <- recreate.partition
list_clusters <- ss2_glia.seur_traj@meta.data$K300
names(list_clusters) <- rownames(ss2_glia.seur_traj@meta.data)
cds@clusters@listData[['UMAP']][['clusters']] <- list_clusters
#cds@clusters@listData[['tSNE']][['clusters']] <- list_clusters
cds@clusters@listData[['UMAP']][['louvarin_res']] <- "NA"
#cds@clusters@listData[['tSNE']][['louvarin_res']] <- "NA"
cds@int_colData@listData[['reducedDims']][["UMAP"]] <- ss2_glia.seur_traj@reductions[['umap']]@cell.embeddings
#cds@int_colData@listData[['reducedDims']][["tSNE"]] <- ss2_glia.seur_traj@reductions[['tsne']]@cell.embeddings
cds@preprocess_aux$gene_loadings <- ss2_glia.seur_traj@reductions[["pca"]]@feature.loadings
# UMAP from Seurat v3 to Monocle v3 done.
cds <- learn_graph(cds)
##
|
| | 0%
|
|======================================================================| 100%
similar to DM, but more branches making it complex.
root_cells <- rownames(ss2_glia.seur_traj@meta.data %>% filter(K300 == "Glia_1.1") )[
order((ss2_glia.seur_traj@meta.data %>% filter(K300 == "Glia_1.1"))$nGene)[1:10]]
cds <- order_cells(cds,root_cells = root_cells )
manually choose 10 cells from Glia_1.1 with the smallest nGene values as 'root_cells',
so that there are directions starting in color 0:'dark purple'.
DimPlot(ss2_glia.seur_traj, group.by = 'nGene', reduction = 'umap',cols = material.heat(1909)) +
labs(title = "nGene distribution") +
guides(colour = guide_coloursteps(show.limits = TRUE,label=FALSE))
this pseudotime is highly related to nGene map,
maybe ss2 expression level normalized by 'CPM' is naturally with 'nGene pattern',
making this result nonsense. hope the following 10x data by 'UMI count' would provide support.
according to analysis above, can't help to think if there are only two major groups, next check 'Glia_1 vs Glia_2/3'
nuclei info
table(ss2_glia.cov$major)
##
## Glia_1 Glia_23
## 1074 1965
# quantile with step 0.05
test_quant.major <- data.frame(quantile(ss2_glia.masts_inlocal.major.filt1$alpha - ss2_glia.masts_inlocal.major.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.major) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.major.filt1$alpha - ss2_glia.masts_inlocal.major.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.6))
text(x=0:20*1.2+0.65,y=test_quant.major[,1]+0.02*test_quant.major[,1]/abs(test_quant.major[,1]),round(test_quant.major[,1],3),cex=0.7)
plot(data.frame(coefD=(ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_1"))$padjD)),
pch=16,col="grey",
main="Glia_1", xlab="DE coefficient (Glia_1 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.05,y=2.5,"padjD < 0.05",cex=0.65)
points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_1"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$coefD+0.1,
logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$padjD)+0.8),
(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene,cex=0.6)
legend(x=0,y=95,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
plot(data.frame(coefD=(ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_23"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_23"))$padjD)),
pch=16,col="grey",
main="Glia_23", xlab="DE coefficient (Glia_23 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.05,y=2.5,"padjD < 0.05",cex=0.65)
points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_23"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_23"))$padjD)),
pch=16,col="lightblue")
points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$coefD,
logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$padjD)),
pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$coefD+0.06,
logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$padjD)+1.5),
(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene,cex=0.6)
legend(x=0,y=120,c("raw","filt1","filt2"),cex=1,
col=c("grey","lightblue","darkred"),pch=c(16,16,16))
left group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = grepl("Glia_1",ss2_glia.seur$K300), cols = c("#7CAE00","#F8766D"),
pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
(sorted by coefD, left -> right, up -> down)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[1:83], ncol = 4)
(sorted by coefD, up -> down, left -> right)
multiplot(
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[1:20],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[21:40],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[41:60],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[61:80],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
cols = 4)
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[81:83],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6))
right group
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = grepl("Glia_2|Glia_3",ss2_glia.seur$K300), cols = c("#00BFC4","#C77CFF"),
pt.size = 0.5,label = T) +
labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)
(sorted by coefD, left -> right, up -> down)
FeaturePlot(ss2_glia.seur,
(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene[1:165],ncol = 4)
(sorted by coefD, up -> down, left -> right)
multiplot(
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene[1:33],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[34:66],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[67:99],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[100:132],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[133:165],
data=ss2_glia.seur@assays[['RNA']]@data,
groups=ss2_glia.cov$major,
cells=rownames(ss2_glia.seur@meta.data),
ylim=c(0,6)),
cols = 5)
nuclei with Age info
table(ss2_glia.seur@meta.data$Age_Processed)
##
## Old Young
## 1074 1964
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age.filt1$alpha - ss2_glia.masts_inlocal.Age.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age.filt1$alpha - ss2_glia.masts_inlocal.Age.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.02*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F) + labs(title = "ss2_glia Age"),
cols = 1)
(ss2_glia.masts_inlocal.Age.filt2 %>% filter(contrast=="identYoung"))$gene
## [1] "Srsf2" "9230102K24Rik" "Mvp" "Plac9b"
## [5] "Scn3a" "Slc34a2" "Tmem181c-ps" "Spag5"
## [9] "Paqr6" "Tmem254b" "Igf2r" "Utp11l"
## [13] "Wdr6" "Taf6" "Dtx3" "Cd320"
## [17] "Srsf6" "Zdhhc1" "Dalrd3" "Hsf5"
## [21] "Ccdc88c" "Gdi1" "Nrbp2" "Ubn1"
## [25] "Iqcg" "Meg3" "Brd9" "Col3a1"
## [29] "1810046K07Rik" "2410018M08Rik" "Pnldc1" "Dnaaf3"
## [33] "Cep55" "Tnni3" "Ttll6"
nuclei with Age info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Age_Processed)
##
## Old Young
## 404 670
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.02*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_1",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Age_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Age info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Age_Processed)
##
## Old Young
## 670 1294
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.3))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.01*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Age_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Model info
table(ss2_glia.seur@meta.data$Model_Processed)
##
## Sox10 Uchl1
## 3018 15
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Model',label = F) + labs(title = "ss2_glia Model") +
coord_fixed(ratio=0.94),
cols = 1)
nuclei with Segment info
table(ss2_glia.seur@meta.data$Segment_Processed)
##
## Distal Proximal
## 1266 1585
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment.filt1$alpha - ss2_glia.masts_inlocal.Segment.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment.filt1$alpha - ss2_glia.masts_inlocal.Segment.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F) + labs(title = "ss2_glia Segment"),
cols = 1)
nuclei with Segment info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Segment_Processed)
##
## Distal Proximal
## 496 516
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_1",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Segment_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Segment info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Segment_Processed)
##
## Distal Proximal
## 770 1069
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Segment_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Seg_raw info ('All' excluded)
table(ss2_glia.seur@meta.data$Segment)
##
## 1 2 3 4 All
## 909 676 567 699 187
why calculate Seg1/2/3/4 separately while already have Distal/Proximal ?
used to try different parameters to repeat DEG results in paper,
for Segment part, still not sure how they did it, here just list results in both ways.
# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),round(test_quant.Seg_raw[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F) + labs(title = "ss2_glia Seg_raw") +
coord_fixed(ratio=0.95),
cols = 1)
nuclei with Seg_raw info ('All' excluded)
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Segment)
##
## 1 2 3 4 All
## 254 262 219 277 62
# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),
round(test_quant.Seg_raw[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F, pt.size = 0.55,
cells = grep("Glia_1",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Seg_raw_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Seg_raw info ('All' excluded)
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Segment)
##
## 1 2 3 4 All
## 655 414 348 422 125
# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),
round(test_quant.Seg_raw[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(rep("grey",2),glia.colors.inlocal[1:2])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F, pt.size = 0.55,
cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Seg_raw_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Sex info
table(ss2_glia.seur@meta.data$Sex_Processed)
##
## F M
## 1081 1957
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex.filt1$alpha - ss2_glia.masts_inlocal.Sex.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex.filt1$alpha - ss2_glia.masts_inlocal.Sex.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.02*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F) + labs(title = "ss2_glia Sex") +
coord_fixed(ratio=0.97),
cols = 1)
nuclei with Sex info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Sex_Processed)
##
## F M
## 381 693
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.02*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_1",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Sex_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Sex info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Sex_Processed)
##
## F M
## 700 1264
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.028*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)
here Two F specific genes strangely get too low padjD to be zero, so not plotted.
(ss2_glia.masts_inlocal.Sex_Glia23.filt2 %>% filter(contrast == "identF"))[,c("gene","contrast","coefD","padjD")]
## gene contrast coefD padjD
## 1 Xist identF 9.9678890 0.000000e+00
## 2 Tsix identF 8.3296312 0.000000e+00
## 3 Zfp382 identF 1.0967016 1.420307e-25
## 4 P2rx1 identF 1.0133499 3.812840e-22
## 5 Tmem181b-ps identF 0.9796084 1.376305e-19
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Sex_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Time info
table(ss2_glia.seur@meta.data$Time_Processed)
##
## 7AM 7PM
## 1492 1390
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time.filt1$alpha - ss2_glia.masts_inlocal.Time.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time.filt1$alpha - ss2_glia.masts_inlocal.Time.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.012*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F) + labs(title = "ss2_glia Time") +
coord_fixed(ratio=0.95),
cols = 1)
nuclei with Time info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Time_Processed)
##
## 7AM 7PM
## 514 511
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia1.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.5))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.02*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_1",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Time_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
nuclei with Time info
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Time_Processed)
##
## 7AM 7PM
## 978 879
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant
# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia23.filt1$ref_alpha,
probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.4))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.012*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F, pt.size = 0.55,
cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) +
labs(title = "ss2_glia Time_Glia_2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)
ss2_glia part ends here.